{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "4a9ddcec-1c0a-4598-849e-5adb8a0e14b5",
   "metadata": {},
   "source": [
    "# 例子\n",
    "https://victorzhou.com/blog/intro-to-neural-networks/#3-training-a-neural-network-part-1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "5913a582-f72e-4a68-82fe-5758fc0b14cd",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 0 loss: 0.165\n",
      "Epoch 10 loss: 0.060\n",
      "Epoch 20 loss: 0.039\n",
      "Epoch 30 loss: 0.032\n",
      "Epoch 40 loss: 0.028\n",
      "Epoch 50 loss: 0.025\n",
      "Epoch 60 loss: 0.023\n",
      "Epoch 70 loss: 0.021\n",
      "Epoch 80 loss: 0.020\n",
      "Epoch 90 loss: 0.018\n",
      "Epoch 100 loss: 0.017\n",
      "Epoch 110 loss: 0.016\n",
      "Epoch 120 loss: 0.015\n",
      "Epoch 130 loss: 0.014\n",
      "Epoch 140 loss: 0.013\n",
      "Epoch 150 loss: 0.013\n",
      "Epoch 160 loss: 0.012\n",
      "Epoch 170 loss: 0.012\n",
      "Epoch 180 loss: 0.011\n",
      "Epoch 190 loss: 0.011\n",
      "Epoch 200 loss: 0.010\n",
      "Epoch 210 loss: 0.010\n",
      "Epoch 220 loss: 0.009\n",
      "Epoch 230 loss: 0.009\n",
      "Epoch 240 loss: 0.009\n",
      "Epoch 250 loss: 0.008\n",
      "Epoch 260 loss: 0.008\n",
      "Epoch 270 loss: 0.008\n",
      "Epoch 280 loss: 0.008\n",
      "Epoch 290 loss: 0.007\n",
      "Epoch 300 loss: 0.007\n",
      "Epoch 310 loss: 0.007\n",
      "Epoch 320 loss: 0.007\n",
      "Epoch 330 loss: 0.006\n",
      "Epoch 340 loss: 0.006\n",
      "Epoch 350 loss: 0.006\n",
      "Epoch 360 loss: 0.006\n",
      "Epoch 370 loss: 0.006\n",
      "Epoch 380 loss: 0.006\n",
      "Epoch 390 loss: 0.006\n",
      "Epoch 400 loss: 0.005\n",
      "Epoch 410 loss: 0.005\n",
      "Epoch 420 loss: 0.005\n",
      "Epoch 430 loss: 0.005\n",
      "Epoch 440 loss: 0.005\n",
      "Epoch 450 loss: 0.005\n",
      "Epoch 460 loss: 0.005\n",
      "Epoch 470 loss: 0.005\n",
      "Epoch 480 loss: 0.005\n",
      "Epoch 490 loss: 0.004\n",
      "Epoch 500 loss: 0.004\n",
      "Epoch 510 loss: 0.004\n",
      "Epoch 520 loss: 0.004\n",
      "Epoch 530 loss: 0.004\n",
      "Epoch 540 loss: 0.004\n",
      "Epoch 550 loss: 0.004\n",
      "Epoch 560 loss: 0.004\n",
      "Epoch 570 loss: 0.004\n",
      "Epoch 580 loss: 0.004\n",
      "Epoch 590 loss: 0.004\n",
      "Epoch 600 loss: 0.004\n",
      "Epoch 610 loss: 0.004\n",
      "Epoch 620 loss: 0.004\n",
      "Epoch 630 loss: 0.003\n",
      "Epoch 640 loss: 0.003\n",
      "Epoch 650 loss: 0.003\n",
      "Epoch 660 loss: 0.003\n",
      "Epoch 670 loss: 0.003\n",
      "Epoch 680 loss: 0.003\n",
      "Epoch 690 loss: 0.003\n",
      "Epoch 700 loss: 0.003\n",
      "Epoch 710 loss: 0.003\n",
      "Epoch 720 loss: 0.003\n",
      "Epoch 730 loss: 0.003\n",
      "Epoch 740 loss: 0.003\n",
      "Epoch 750 loss: 0.003\n",
      "Epoch 760 loss: 0.003\n",
      "Epoch 770 loss: 0.003\n",
      "Epoch 780 loss: 0.003\n",
      "Epoch 790 loss: 0.003\n",
      "Epoch 800 loss: 0.003\n",
      "Epoch 810 loss: 0.003\n",
      "Epoch 820 loss: 0.003\n",
      "Epoch 830 loss: 0.003\n",
      "Epoch 840 loss: 0.003\n",
      "Epoch 850 loss: 0.003\n",
      "Epoch 860 loss: 0.003\n",
      "Epoch 870 loss: 0.002\n",
      "Epoch 880 loss: 0.002\n",
      "Epoch 890 loss: 0.002\n",
      "Epoch 900 loss: 0.002\n",
      "Epoch 910 loss: 0.002\n",
      "Epoch 920 loss: 0.002\n",
      "Epoch 930 loss: 0.002\n",
      "Epoch 940 loss: 0.002\n",
      "Epoch 950 loss: 0.002\n",
      "Epoch 960 loss: 0.002\n",
      "Epoch 970 loss: 0.002\n",
      "Epoch 980 loss: 0.002\n",
      "Epoch 990 loss: 0.002\n",
      "Emily: 0.969\n",
      "Frank: 0.054\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "\n",
    "def sigmoid(x):\n",
    "  # Sigmoid activation function: f(x) = 1 / (1 + e^(-x))\n",
    "  return 1 / (1 + np.exp(-x))\n",
    "\n",
    "def deriv_sigmoid(x):\n",
    "  # Derivative of sigmoid: f'(x) = f(x) * (1 - f(x))\n",
    "  fx = sigmoid(x)\n",
    "  return fx * (1 - fx)\n",
    "\n",
    "def mse_loss(y_true, y_pred):\n",
    "  # y_true and y_pred are numpy arrays of the same length.\n",
    "  return ((y_true - y_pred) ** 2).mean()\n",
    "\n",
    "class OurNeuralNetwork:\n",
    "  '''\n",
    "  A neural network with:\n",
    "    - 2 inputs\n",
    "    - a hidden layer with 2 neurons (h1, h2)\n",
    "    - an output layer with 1 neuron (o1)\n",
    "\n",
    "  *** DISCLAIMER ***:\n",
    "  The code below is intended to be simple and educational, NOT optimal.\n",
    "  Real neural net code looks nothing like this. DO NOT use this code.\n",
    "  Instead, read/run it to understand how this specific network works.\n",
    "  '''\n",
    "  def __init__(self):\n",
    "    # Weights\n",
    "    self.w1 = np.random.normal()\n",
    "    self.w2 = np.random.normal()\n",
    "    self.w3 = np.random.normal()\n",
    "    self.w4 = np.random.normal()\n",
    "    self.w5 = np.random.normal()\n",
    "    self.w6 = np.random.normal()\n",
    "\n",
    "    # Biases\n",
    "    self.b1 = np.random.normal()\n",
    "    self.b2 = np.random.normal()\n",
    "    self.b3 = np.random.normal()\n",
    "\n",
    "  def feedforward(self, x):\n",
    "    # x is a numpy array with 2 elements.\n",
    "    h1 = sigmoid(self.w1 * x[0] + self.w2 * x[1] + self.b1)\n",
    "    h2 = sigmoid(self.w3 * x[0] + self.w4 * x[1] + self.b2)\n",
    "    o1 = sigmoid(self.w5 * h1 + self.w6 * h2 + self.b3)\n",
    "    return o1\n",
    "\n",
    "  def train(self, data, all_y_trues):\n",
    "    '''\n",
    "    - data is a (n x 2) numpy array, n = # of samples in the dataset.\n",
    "    - all_y_trues is a numpy array with n elements.\n",
    "      Elements in all_y_trues correspond to those in data.\n",
    "    '''\n",
    "    learn_rate = 0.1\n",
    "    epochs = 1000 # number of times to loop through the entire dataset\n",
    "\n",
    "    for epoch in range(epochs):\n",
    "      for x, y_true in zip(data, all_y_trues):\n",
    "        # --- Do a feedforward (we'll need these values later)\n",
    "        sum_h1 = self.w1 * x[0] + self.w2 * x[1] + self.b1\n",
    "        h1 = sigmoid(sum_h1)\n",
    "\n",
    "        sum_h2 = self.w3 * x[0] + self.w4 * x[1] + self.b2\n",
    "        h2 = sigmoid(sum_h2)\n",
    "\n",
    "        sum_o1 = self.w5 * h1 + self.w6 * h2 + self.b3\n",
    "        o1 = sigmoid(sum_o1)\n",
    "        y_pred = o1\n",
    "\n",
    "        # --- Calculate partial derivatives.\n",
    "        # --- Naming: d_L_d_w1 represents \"partial L / partial w1\"\n",
    "        d_L_d_ypred = -2 * (y_true - y_pred)\n",
    "\n",
    "        # Neuron o1\n",
    "        d_ypred_d_w5 = h1 * deriv_sigmoid(sum_o1)\n",
    "        d_ypred_d_w6 = h2 * deriv_sigmoid(sum_o1)\n",
    "        d_ypred_d_b3 = deriv_sigmoid(sum_o1)\n",
    "\n",
    "        d_ypred_d_h1 = self.w5 * deriv_sigmoid(sum_o1)\n",
    "        d_ypred_d_h2 = self.w6 * deriv_sigmoid(sum_o1)\n",
    "\n",
    "        # Neuron h1\n",
    "        d_h1_d_w1 = x[0] * deriv_sigmoid(sum_h1)\n",
    "        d_h1_d_w2 = x[1] * deriv_sigmoid(sum_h1)\n",
    "        d_h1_d_b1 = deriv_sigmoid(sum_h1)\n",
    "\n",
    "        # Neuron h2\n",
    "        d_h2_d_w3 = x[0] * deriv_sigmoid(sum_h2)\n",
    "        d_h2_d_w4 = x[1] * deriv_sigmoid(sum_h2)\n",
    "        d_h2_d_b2 = deriv_sigmoid(sum_h2)\n",
    "\n",
    "        # --- Update weights and biases\n",
    "        # Neuron h1\n",
    "        self.w1 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_w1\n",
    "        self.w2 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_w2\n",
    "        self.b1 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_b1\n",
    "\n",
    "        # Neuron h2\n",
    "        self.w3 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_w3\n",
    "        self.w4 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_w4\n",
    "        self.b2 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_b2\n",
    "\n",
    "        # Neuron o1\n",
    "        self.w5 -= learn_rate * d_L_d_ypred * d_ypred_d_w5\n",
    "        self.w6 -= learn_rate * d_L_d_ypred * d_ypred_d_w6\n",
    "        self.b3 -= learn_rate * d_L_d_ypred * d_ypred_d_b3\n",
    "\n",
    "      # --- Calculate total loss at the end of each epoch\n",
    "      if epoch % 10 == 0:\n",
    "        y_preds = np.apply_along_axis(self.feedforward, 1, data)\n",
    "        loss = mse_loss(all_y_trues, y_preds)\n",
    "        print(\"Epoch %d loss: %.3f\" % (epoch, loss))\n",
    "\n",
    "# Define dataset\n",
    "data = np.array([\n",
    "  [-2, -1],  # Alice\n",
    "  [25, 6],   # Bob\n",
    "  [17, 4],   # Charlie\n",
    "  [-15, -6], # Diana\n",
    "])\n",
    "all_y_trues = np.array([\n",
    "  1, # Alice\n",
    "  0, # Bob\n",
    "  0, # Charlie\n",
    "  1, # Diana\n",
    "])\n",
    "\n",
    "# Train our neural network!\n",
    "network = OurNeuralNetwork()\n",
    "network.train(data, all_y_trues)\n",
    "\n",
    "# Make some predictions\n",
    "emily = np.array([-7, -3]) # 128 pounds, 63 inches\n",
    "frank = np.array([20, 2])  # 155 pounds, 68 inches\n",
    "print(\"Emily: %.3f\" % network.feedforward(emily)) # 0.951 - F\n",
    "print(\"Frank: %.3f\" % network.feedforward(frank)) # 0.039 - M"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "fd1d2af3-778d-4ce8-88c1-39430d0eea6e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Emily: 0.969\n",
      "Frank: 0.054\n"
     ]
    }
   ],
   "source": [
    "# Make some predictions\n",
    "emily = np.array([-7, -3]) # 128 pounds, 63 inches\n",
    "frank = np.array([20, 2])  # 155 pounds, 68 inches\n",
    "print(\"Emily: %.3f\" % network.feedforward(emily)) # 0.951 - F\n",
    "print(\"Frank: %.3f\" % network.feedforward(frank)) # 0.039 - M"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1153bf3e-15e2-4236-9b12-4e69b9a407bd",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.19"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
